package DOM;

import java.io.File;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class test {

	public static void testParentChildCss(){
		Document doc = null;
		// need http protocol
		try {
			doc = Jsoup.parse(new File("/home/hour/Documents/aticle collection/khmer.voanews.com/content/a-40-2009-09-07-voa19-90178787/1141362.html"), "UTF-8");
		} catch (IOException e) {
			e.printStackTrace();
		}
 
		// get title
		Elements titlelinks = doc.select("div#article > h1");
		String title = titlelinks.text();
		System.out.println(title);
		
		// get category
		String cate = "";
		Elements catelinks = doc.select("h2.sitetitle");
		for (Element element : catelinks) {
			cate += element.text();
		}
		System.out.println("cate = "+cate);
	}
	
	public static void main(String [] args){
//		testParentChildCss();
		System.out.println(Constant.RegEnglish);
	}
	
}
